Hadley Wickham: grammar of graphics
Hans Rosling: GapMinder
Gapminder World - Wealth & Health of Nations
ggplotsuppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
library(gapminder)
})
# preview data
gapminder
## Source: local data frame [1,704 x 6]
##
## country continent year lifeExp pop gdpPercap
## (fctr) (fctr) (int) (dbl) (int) (dbl)
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
## 7 Afghanistan Asia 1982 39.854 12881816 978.0114
## 8 Afghanistan Asia 1987 40.822 13867957 852.3959
## 9 Afghanistan Asia 1992 41.674 16317921 649.3414
## 10 Afghanistan Asia 1997 41.763 22227415 635.3414
## .. ... ... ... ... ... ...
# get range of available data
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
# setup dataframe
g = gapminder %>%
filter(year==2007) %>% # most recent year
mutate(pop_m = pop/1e6) # population, millions
# plot scatterplot of most recent year
s = ggplot(g, aes(x=gdpPercap, y=lifeExp)) +
geom_point()
s
# add aesthetic of size by population
s = s +
aes(size=pop_m)
s
# add aesthetic of color by continent
s = s +
aes(color=continent)
s
# add title, update axes labels
s = s +
ggtitle('Health & Wealth of Nations for 2007') +
xlab('GDP per capita ($/year)') +
ylab('Life expectancy (years)')
s
# label legend
s = s +
scale_colour_discrete(name='Continent') +
scale_size_continuous(name='Population (M)')
s
Your Turn
Now with country emissions datasets…
# boxplot by continent
b = ggplot(g, aes(x=continent, y=lifeExp)) +
geom_boxplot()
b
# match color to continents, like scatterplot
b = b +
aes(fill=continent)
b
# drop legend, add title, update axes labels
b = b +
theme(legend.position='none') +
ggtitle('Life Expectancy by Continent for 2007') +
xlab('Continent') +
ylab('Life expectancy (years)')
b
Your Turn: Make a similar plot but for gdpPercap. Be sure to update the plot’s aesthetic, axis label and title accordingly.
plotlysuppressPackageStartupMessages({
library(plotly) # install.packages('plotly')
})
# scatterplot (Note: key=country shows up on rollover)
s = ggplot(g, aes(x=gdpPercap, y=lifeExp, key=country)) +
geom_point()
ggplotly(s)
# boxplot
ggplotly(b)
Your Turn: Expand the interactive scatterplot to include all the other bells and whistles of the previous plot in one continuous set of code (no in between setting of s).
library(explodingboxplotR) # devtools::install_github('timelyportfolio/explodingboxplotR')
exploding_boxplot(g,
y = 'lifeExp',
group = 'continent',
color = 'continent',
label = 'country')
The googleVis package ports most of the Google charts functionality.
For every R chunk must set option results='asis', and once before any googleVis plots, set op <- options(gvis.plot.tag='chart').
suppressPackageStartupMessages({
library(googleVis) # install.packages('googleVis')
})
op <- options(gvis.plot.tag='chart')
m = gvisMotionChart(
gapminder %>%
mutate(
pop_m = pop / 1e6,
log_gdpPercap = log(gdpPercap)),
idvar='country',
timevar='year',
xvar='log_gdpPercap',
yvar='lifeExp',
colorvar='continent',
sizevar='pop_m')
plot(m)
Your Turn: Repeat the motion chart with the country having the highest gdpPercap filtered out.
tmapThematic maps
library(tmap) # install.packages('tmap')
# load world spatial polygons
data(World)
# inspect values in World
World@data %>% tbl_df()
## Source: local data frame [177 x 15]
##
## iso_a3 name sovereignt
## (fctr) (fctr) (fctr)
## 1 AFG Afghanistan Afghanistan
## 2 AGO Angola Angola
## 3 ALB Albania Albania
## 4 ARE United Arab Emirates United Arab Emirates
## 5 ARG Argentina Argentina
## 6 ARM Armenia Armenia
## 7 ATA Antarctica Antarctica
## 8 ATF Fr. S. Antarctic Lands France
## 9 AUS Australia Australia
## 10 AUT Austria Austria
## .. ... ... ...
## Variables not shown: continent (fctr), subregion (fctr), area (dbl),
## pop_est (dbl), pop_est_dens (dbl), gdp_md_est (dbl), gdp_cap_est (dbl),
## economy (fctr), income_grp (fctr), life_exp (dbl), well_being (dbl), HPI
## (dbl)
# gapminder countries not in World. skipping for now
g %>%
anti_join(World@data, by=c('country'='name')) %>%
arrange(desc(pop))
## Warning in anti_join_impl(x, y, by$x, by$y): joining factors with different
## levels, coercing to character vector
## Source: local data frame [19 x 7]
##
## country continent year lifeExp pop gdpPercap
## (fctr) (fctr) (int) (dbl) (int) (dbl)
## 1 Congo, Dem. Rep. Africa 2007 46.462 64606759 277.5519
## 2 Korea, Rep. Asia 2007 78.623 49044790 23348.1397
## 3 Korea, Dem. Rep. Asia 2007 67.297 23301725 1593.0655
## 4 Yemen, Rep. Asia 2007 62.698 22211743 2280.7699
## 5 Czech Republic Europe 2007 76.486 10228744 22833.3085
## 6 Dominican Republic Americas 2007 72.235 9319622 6025.3748
## 7 Hong Kong, China Asia 2007 82.208 6980412 39724.9787
## 8 Slovak Republic Europe 2007 74.663 5447502 18678.3144
## 9 Singapore Asia 2007 79.972 4553009 47143.1796
## 10 Bosnia and Herzegovina Europe 2007 74.852 4552198 7446.2988
## 11 Central African Republic Africa 2007 44.741 4369038 706.0165
## 12 West Bank and Gaza Asia 2007 73.422 4018332 3025.3498
## 13 Congo, Rep. Africa 2007 55.322 3800610 3632.5578
## 14 Mauritius Africa 2007 72.801 1250882 10956.9911
## 15 Reunion Africa 2007 76.442 798094 7670.1226
## 16 Comoros Africa 2007 65.152 710960 986.1479
## 17 Bahrain Asia 2007 75.635 708573 29796.0483
## 18 Equatorial Guinea Africa 2007 51.579 551201 12154.0897
## 19 Sao Tome and Principe Africa 2007 65.528 199579 1598.4351
## Variables not shown: pop_m (dbl)
# World countries not in gapminder. skipping for now
World@data %>%
anti_join(g, by=c('name'='country')) %>%
arrange(desc(pop_est)) %>%
select(iso_a3, name, pop_est)
## Warning in anti_join_impl(x, y, by$x, by$y): joining factors with different
## levels, coercing to character vector
## iso_a3 name pop_est
## 1 RUS Russia 140041247
## 2 COD Dem. Rep. Congo 68692542
## 3 KOR Korea 48508972
## 4 UKR Ukraine 45700395
## 5 UZB Uzbekistan 27606007
## 6 YEM Yemen 23822783
## 7 PRK Dem. Rep. Korea 22665345
## 8 KAZ Kazakhstan 15399437
## 9 SSD S. Sudan 10625176
## 10 CZE Czech Rep. 10211904
## 11 DOM Dominican Rep. 9650054
## 12 BLR Belarus 9648533
## 13 AZE Azerbaijan 8238672
## 14 TJK Tajikistan 7349145
## 15 LAO Laos 6834942
## 16 PNG Papua New Guinea 6057263
## 17 SVK Slovakia 5463046
## 18 KGZ Kyrgyzstan 5431747
## 19 TKM Turkmenistan 4884887
## 20 ARE United Arab Emirates 4798491
## 21 GEO Georgia 4615807
## 22 BIH Bosnia and Herz. 4613414
## 23 CAF Central African Rep. 4511488
## 24 MDA Moldova 4320748
## 25 PSE Palestine 4119083
## 26 COG Congo 4012809
## 27 LTU Lithuania 3555179
## 28 <NA> Somaliland 3500000
## 29 ARM Armenia 2967004
## 30 LVA Latvia 2231503
## 31 MKD Macedonia 2066718
## 32 <NA> Kosovo 1804838
## 33 EST Estonia 1299371
## 34 TLS Timor-Leste 1131612
## 35 FJI Fiji 944720
## 36 QAT Qatar 833285
## 37 GUY Guyana 772298
## 38 BTN Bhutan 691141
## 39 GNQ Eq. Guinea 650702
## 40 SLB Solomon Is. 595613
## 41 CYP Cyprus 531640
## 42 LUX Luxembourg 491775
## 43 SUR Suriname 481267
## 44 BRN Brunei 388190
## 45 BHS Bahamas 309156
## 46 BLZ Belize 307899
## 47 <NA> N. Cyprus 265100
## 48 NCL New Caledonia 227436
## 49 VUT Vanuatu 218519
## 50 GRL Greenland 57600
## 51 ATA Antarctica 3802
## 52 FLK Falkland Is. 3140
## 53 ATF Fr. S. Antarctic Lands 140
## 54 ESH W. Sahara NA
# join gapminder data to World
World@data = World@data %>%
left_join(g, by=c('name'='country'))
## Warning in left_join_impl(x, y, by$x, by$y): joining factors with different
## levels, coercing to character vector
# make map
m = tm_shape(World) +
tm_polygons('lifeExp', palette='RdYlGn', id='name', title='Life expectancy (years)', auto.palette.mapping=F) +
tm_style_gray() + tm_format_World()
m
# show interactive map
tmap_leaflet(m)